package com.algo.dict;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

/**
 * SCWS is a word segment software, 
 * here use Chinese dictionary of SCWS <br/>
 * format like below: <br/>
 * 		# WORD	TF	IDF	ATTR <br/>
 * 		帕克蒂亚	11.26	14.12	nr <br/>
 * 		雁门	13.33	14.19	ns <br/>
 * 		...
 * 
 * @author lujianfeng@miaozhen.com
 *
 */

public class SCWSDictionary implements Dictionary{
	
	private Map<String, String> dict;
	
	public SCWSDictionary(String path){
		dict = new HashMap<String, String>();
		try {
			BufferedReader br = new BufferedReader(new FileReader(path));
			String line = br.readLine(); //skip first line
			while((line = br.readLine()) != null){
				String[] items = line.split("\\t");
				dict.put(items[0], items[3]);
			}
			br.close();
		} catch (IOException e) {
			throw new RuntimeException(e);
		}
	}

	public boolean inDictionary(String word) {
		return dict.containsKey(word);
	}

	public String getPOS(String word) {
		String value = dict.get(word);
		if(value == null)	return UNDEFINED;
		return value;
	}

	public static void main(String[] args) {
		//for test
		Dictionary dict = new SCWSDictionary("E://test//dict.utf8");
		System.out.println(dict.inDictionary("中华人民共和国"));
		System.out.println(dict.getPOS("澳门"));
	}

}
